#!/usr/bin/python
# -*- coding: UTF-8 -*-
# author : bird.zhang@ximalaya.com

import pandas as pd

# 1. 读取csv文件

# conn = mysql.connect(host='192.168.1.161', port=3306, user='naliworld', passwd='password!', db='lamia')
#
# df = pd.read_sql('select * from tb_live_record_all where actual_start_at >= 1514736000000', con=conn, index_col='id')

df = pd.read_csv('tb_live_record_all.csv', index_col='id', lineterminator='\n')

df = df[df['is_deleted'] == False]
df = df[['actual_start_at', 'uid']]

# df.info()
# df = df[['uid']]

uid_grouped_data = df.groupby('uid')[['uid']].count()
uid_grouped_data['cnt'] = uid_grouped_data
del uid_grouped_data['uid']


uid_grouped_data.info()

# cnt_grouped_data = uid_grouped_data

# data = grouped_data.count()

cnt_grouped_data = uid_grouped_data.groupby('cnt')[['cnt']].count()

cnt_grouped_data.info()

# print(cnt_grouped_data)

cnt_grouped_data.plot()

# data.plot()


# 2. 按照时间段groupby，然后count()
